#####################################
## "Backyard politics in Foreign Aid"
## William Christiansen
## Tobias Heinrich
## Timothy Peterson
#####################################

## File that checks balance and provides
## descriptive statistics


## Check balance and randomization
##################################
load("output/data_d1.Rdata")
n <- nrow(d1)

## Inspect randomizations
round(table(d1$Treatment)/n, di=2)

round(table(d1$Treatment, d1$Costs)/n, di=2)
round(table(d1$Treatment, d1$Change)/n, di=2)


## Balance
##########
smds <- c()
nam <- c("Local, no Senator mentioned", "Local, Senator mentioned")
for(i in 1:2)
{
  for(j in 1:2)
  {
    tmp <- xBalance(fmla=formula(paste0("T", i, " ~ 1 + Age + Gender + Ideology + 
           + Education_hi + Education_lo + GreatestCountry + HousingMed + HousingPctIncrease + Transnational")),
                    data=subset(d1, Change == ifelse(j == 1, "Increase", "Cut") & Treatment %in% c(0, i)))
    smds <- rbind(smds,
                  data.frame(Variable=dimnames(tmp$results)$vars,
                             ZScore=tmp$results[,2,1],
                             Comparison=nam[i], Change=ifelse(j == 1, "Cut", "Increase")))
  }
}
smds$PrettyVar <- pretty_var_name(i=as.character(smds$Variable))

## Graph
########
g <- ggplot(data=smds, aes(x=PrettyVar, y=ZScore, group=Change, shape=Change))
g <- g + coord_flip() + geom_hline(yintercept=0, size=1)
g <- g + geom_hline(yintercept=c(-1.96, 1.96), size=.4, colour="gray40")
g <- g + geom_point(size=2.5, position=position_dodge(width=.38)) + xlab("")
g <- g + facet_grid(. ~ Comparison) + theme_bw()
g <- g + ylab("Z score of standardized differences in means")
g <- g + ggtitle("Balance across treatments")
g <- g + scale_y_continuous(limits=c(min(c(-2.05, min(smds$ZScore))), 
                                     max(c(2.05, max(smds$ZScore)))), 
                            breaks=c(-1.96, 0, 1.96))
g <- g + theme(axis.text = element_text(size=rel(.725)),
               panel.grid.major.y = element_line(size=0.25, colour="grey80", 
                                                 linetype="dashed"),
               panel.grid.major.x = element_blank(),               
               panel.grid.minor = element_blank(),
               axis.ticks = element_blank(),
               strip.text = element_text(size=rel(1.0), hjust=0, vjust=1),
               strip.background = element_blank(),
               plot.title = element_text(size=rel(1.2), face="bold", hjust=0))
ggsave("output/R-Balance.pdf", width=11, height=5)



## Geographic distance between Local and non-local
##################################################
nimby <- read.csv("data/MTurk data.csv", sep="|")
nimby <- subset(nimby, user_state %in% c("NJ", "CA", "FL", "MA", "MD", "NC", "NY", "TX", "VA"))
nimby <- nimby[, c("userlocation", "user_zip", "ncity_lon_2", "ncity_lat_2", "isLocalRecord_2")]
tmp <- str_split(nimby$userlocation, pattern=",", n=3)
nimby$user_lat <- nimby$user_lon <- 0
for(i in 1:nrow(nimby))
{
  nimby$user_lon[i] <- as.numeric(tmp[[i]][1])
  nimby$user_lat[i] <- as.numeric(tmp[[i]][2])
}  
nimby$userlocation <- NULL
zipcode$user_zip <- as.numeric(zipcode$zip)
nimby <- merge(nimby, zipcode[, c(4, 5, 6)], by="user_zip", all.x=T)
nimby$user_zip <- NULL
colnames(nimby) <- c("city_lon", "city_lat", "Local", "user_ip_lon", "user_ip_lat", "user_zip_lat", "user_zip_lon")
nimby$Distance_zip <- 0
nimby$Distance_ip <- 0

for(i in 1:nrow(nimby))
{
  nimby$Distance_ip[i] <- distHaversine(p1=c(nimby$city_lon[i], nimby$city_lat[i]),
                                        p2=c(nimby$user_ip_lon[i], nimby$user_ip_lat[i]),
                                        r=6378137/1000)
  nimby$Distance_zip[i] <- distHaversine(p1=c(nimby$city_lon[i], nimby$city_lat[i]),
                                         p2=c(nimby$user_zip_lon[i], nimby$user_zip_lat[i]),
                                         r=6378137/1000)
}

nimby3 <- adply(.data=1:nrow(nimby), .margins=1,
                .fun=function(x) data.frame(rbind(as.matrix(nimby[x, c("Local", "Distance_ip")]),
                                                  as.matrix(nimby[x, c("Local", "Distance_zip")]))))
colnames(nimby3) <- c("X", "Local", "Distance")
nimby3$What <- rep(c("By IP address", "By ZIP code"), nrow(nimby))
nimby3$Local2 <- ifelse(nimby3$Local == 1, "Local", "Not local")
km <- unit_format(unit = "km", scale = 1, digits = 0)
g <- ggplot(data=nimby3, aes(x=Distance, y=..count..))
g <- g + geom_histogram(bins=50) 
g <- g + facet_grid(What ~ Local2) 
g <- g + scale_y_continuous(breaks=NULL)
g <- g + theme_bw() + scale_x_log10(labels=trans_format(trans="identity", format=km),
                                    breaks=10^c(1, 2, 3, 4)) + xlab("Distance")
g <- g + ylab("") + ggtitle("Distances between respondent and mentioned city")
g <- g + theme(axis.text = element_text(size=rel(.625)),
               panel.grid.major.y = element_line(size=0.25, colour="grey80", 
                                                 linetype="dashed"),
               panel.grid.major.x = element_line(size=0.25, colour="grey80", 
                                                 linetype="dashed"),               
               panel.grid.minor = element_blank(),
               axis.ticks = element_blank(),
               strip.text = element_text(size=rel(1.0), hjust=0, vjust=1),
               strip.background = element_blank(),
               plot.title = element_text(size=rel(1.2), face="bold", hjust=0))
ggsave("output/R-LocationHistograms.pdf", width=11, height=5)


## Make maps, Distances
#######################
nimby$Distance_ip <- NULL 
nimby$Distance_zip <- NULL 
nimby2 <- adply(.data=1:nrow(nimby), .margins=1,
                .fun=function(x) data.frame(rbind(as.matrix(nimby[x, c("city_lon", "city_lat", "Local", "user_ip_lat", "user_ip_lon")]),
                                                  as.matrix(nimby[x, c("city_lon", "city_lat", "Local", "user_zip_lat", "user_zip_lon")]))))
colnames(nimby2) <- c("X", "city_lon", "city_lat", "Local", "user_lat", "user_lon")
nimby2$What <- rep(c("By IP address", "By ZIP code"), nrow(nimby))
nimby2$Local2 <- ifelse(nimby2$Local == 1, "Local", "Not local")

g <- ggmap(get_stamenmap(bbox=c(left=-129.5, bottom=23.6, right=-67, top=50.6), zoom=4,
                         maptype="toner-lite", color="bw"))
g <- g + geom_segment(data=nimby2, aes(xend=city_lon, yend=city_lat, x=user_lon, y=user_lat),
                      size=0.2, alpha=.5, colour="gray20", arrow=arrow(angle=30, ends="last", length=unit(0.07, "inches")))
g <- g + facet_grid(What ~ Local2) + ggtitle("Locations of respondent's and mentioned cities")
g <- g + xlab("Longitude") + ylab("Latitude") + theme_bw()
g <- g + theme(axis.text = element_text(size=rel(.625)),
               panel.grid.major.y = element_line(size=0.25, colour="grey80", 
                                                 linetype="dashed"),
               panel.grid.major.x = element_blank(),               
               panel.grid.minor = element_blank(),
               axis.ticks = element_blank(),
               strip.text = element_text(size=rel(1.0), hjust=0, vjust=1),
               strip.background = element_blank(),
               plot.title = element_text(size=rel(1.2), face="bold", hjust=0))
ggsave("output/R-LocationPairsMap.pdf", width=11, height=7)

## Make maps, locations
########################
g <- ggmap(get_stamenmap(bbox=c(left=-129.5, bottom=23.6, right=-67, top=50.6), zoom=4,
                         maptype="toner-lite", color="bw"))
g <- g + geom_point(data=nimby2, aes(x=user_lon, y=user_lat),
                    size=5, fill="black", colour="black", alpha=1/10)
g <- g + facet_grid(. ~ What)
g <- g + xlab("Longitude") + ylab("Latitude") + theme_bw()
g <- g + theme(axis.text = element_text(size=rel(.625)),
               panel.grid.major.y = element_line(size=0.25, colour="grey80", 
                                                 linetype="dashed"),
               panel.grid.major.x = element_blank(),               
               panel.grid.minor = element_blank(),
               axis.ticks = element_blank(),
               strip.text = element_text(size=rel(1.0), hjust=0, vjust=1),
               strip.background = element_blank(),
               plot.title = element_text(size=rel(1.2), face="bold", hjust=0))
ggsave("output/R-LocationUsersMap.pdf", width=10, height=4)

